# devtools::install_local("geneRefineR/", force = T)
# library("echolocatoR")
source("echolocatoR/R/functions.R")
library(readxl)
library(DT)
library(data.table)
library(dplyr)
library(ggplot2)
library(plotly)
library(cowplot)
library(ggrepel)
library(curl)
library(biomaRt)
# Ensembl LD API
library(httr)
library(jsonlite)
library(xml2)
library(gaston)
library(RCurl)
# *** susieR ****
# library(knitrBootstrap) #install_github('jimhester/knitrBootstrap')
library(susieR) # devtools::install_github("stephenslab/susieR")
sessionInfo()## R version 3.5.1 (2018-07-02)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: CentOS release 6.9 (Final)
##
## Matrix products: default
## BLAS/LAPACK: /hpc/packages/minerva-common/intel/parallel_studio_xe_2018/compilers_and_libraries_2018.1.163/linux/mkl/lib/intel64_lin/libmkl_gf_lp64.so
##
## locale:
## [1] C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] susieR_0.6.2.0411 tidyr_0.8.1 RCurl_1.95-4.11
## [4] bitops_1.0-6 gaston_1.5.4 RcppParallel_4.4.2
## [7] Rcpp_1.0.0 xml2_1.2.0 jsonlite_1.5
## [10] httr_1.3.1 biomaRt_2.38.0 curl_3.2
## [13] ggrepel_0.8.0 cowplot_0.9.3 plotly_4.7.1
## [16] ggplot2_3.1.0 dplyr_0.7.6 data.table_1.11.8
## [19] DT_0.4 readxl_1.1.0
##
## loaded via a namespace (and not attached):
## [1] lattice_0.20-35 prettyunits_1.0.2 assertthat_0.2.0
## [4] rprojroot_1.3-2 digest_0.6.18 R6_2.4.0
## [7] cellranger_1.1.0 plyr_1.8.4 backports_1.1.2
## [10] stats4_3.5.1 RSQLite_2.1.1 evaluate_0.11
## [13] pillar_1.3.1 rlang_0.3.1 progress_1.2.0
## [16] lazyeval_0.2.1 blob_1.1.1 S4Vectors_0.20.1
## [19] Matrix_1.2-14 rmarkdown_1.10 stringr_1.4.0
## [22] htmlwidgets_1.2 bit_1.1-14 munsell_0.5.0
## [25] compiler_3.5.1 pkgconfig_2.0.2 BiocGenerics_0.28.0
## [28] htmltools_0.3.6 tidyselect_0.2.4 expm_0.999-2
## [31] tibble_2.0.1 IRanges_2.16.0 XML_3.98-1.12
## [34] viridisLite_0.3.0 crayon_1.3.4 withr_2.1.2
## [37] grid_3.5.1 gtable_0.2.0 DBI_1.0.0
## [40] magrittr_1.5 scales_1.0.0 stringi_1.3.1
## [43] bindrcpp_0.2.2 tools_3.5.1 bit64_0.9-7
## [46] Biobase_2.42.0 glue_1.3.0 purrr_0.2.5
## [49] hms_0.4.2 parallel_3.5.1 yaml_2.1.19
## [52] AnnotationDbi_1.44.0 colorspace_1.3-2 memoise_1.1.0
## [55] knitr_1.20 bindr_0.1.1
print(paste("susieR ", packageVersion("susieR"))) ## [1] "susieR 0.6.2.411"
# MINERVA PATHS TO SUMMARY STATISTICS DATASETS:
# root <- "../../.."
root <- "~/../../../sc/orga/projects"
Data_dirs = list(
# ++++++++ GWAS SUMMARY STATS ++++++++ #
# Nall et al. (2019) w/ 23andMe
"Nalls_23andMe" = list(type="Parkinson's GWAS",
topSS="Data/Parkinsons/Nalls_23andMe/Nalls2019_TableS2.xlsx",
# fullSS=file.path(root,"pd-omics/data/nallsEtAl2019/23andme/PD_all_post30APRIL2015_5.2_extended.txt")),
fullSS=file.path(root,"pd-omics/data/nallsEtAl2019/combined_meta/nallsEtAl2019_allSamples_allVariants.mod.txt"),
reference="https://www.biorxiv.org/content/10.1101/388165v3"),
## IGAP
"IGAP" = list(type="Alzheimer's GWAS",
topSS="Data/Alzheimers/IGAP/Lambert_2019_AD_GWAS.xlsx",
fullSS=file.path(root,"ad-omics/data/AD_GWAS/IGAP/IGAP_stage_1.1000G.phase3.20130502.tsv"),
reference="https://www.nature.com/articles/ng.2802"),
## Marioni et al. (2018)
"Marioni_2018" = list(type="Alzheimer's GWAS",
topSS="Data/Alzheimers/Marioni_2018/Marioni2018_supplementary_tables.xlsm",
fullSS=file.path(root,"ad-omics/data/AD_GWAS/Marioni_2018/Marioni2018.4_UK_Biobank_IGAP_17May2018.1000G.phase3.20130502.tsv"),
reference="https://www.nature.com/articles/s41398-018-0150-6"),
## Jansen et al. (2018)
"Posthuma_2018" = list(type="Alzheimer's GWAS",
topSS="Data/Alzheimers/Posthuma_2018/AD_target_SNP.xlsx",
fullSS=file.path(root,"ad-omics/data/AD_GWAS/Posthuma_2018/phase3.beta.se.hrc.txt"),
reference="https://www.nature.com/articles/s41588-018-0311-9"),
## Kunkle et al. (2018) Alzheimer's GWAS
"Kunkle_2019" = list(type="Alzheimer's GWAS",
topSS="Data/Alzheimers/Kunkle_2019/Kunkle2019_supplementary_tables.xlsx",
fullSS=file.path(root,"ad-omics/data/AD_GWAS/Kunkle_2019/Kunkle_etal_Stage1_results.txt"),
# fullSS_stage2=file.path(root,"ad-omics/data/AD_GWAS/Kunkle_etal_Stage2_results.txt"),
reference="https://www.nature.com/articles/s41588-019-0358-2"),
# ++++++++ eQTL SUMMARY STATS ++++++++ #
## MESA eQTLs: African Americans
"MESA_AFA" = list(type="eQTL",
topSS="Data/eQTL/MESA/AFA_eQTL_PTK2B.txt",
fullSS=file.path(root,"ad-omics/data/mesa/AFA_cis_eqtl_summary_statistics.txt"),
reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa"),
## MESA eQTLs: Caucasians
"MESA_CAU" = list(type="eQTL",
topSS="Data/eQTL/MESA/CAU_eQTL_PTK2B.txt",
fullSS=file.path(root,"ad-omics/data/mesa/CAU_cis_eqtl_summary_statistics.txt"),
reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa"),
## MESA eQTLs: Hispanics
"MESA_HIS" = list(type="eQTL",
topSS="Data/eQTL/MESA/HIS_eQTL_PTK2B.txt",
fullSS=file.path(root,"ad-omics/data/mesa/HIS_cis_eqtl_summary_statistics.txt"),
reference="https://www.nhlbi.nih.gov/science/multi-ethnic-study-atherosclerosis-mesa")
)
Data_dirs_table <- Data_dirs_to_table(Data_dirs, writeCSV = "directories_table.csv")
# Direct to local data files if not working on Minerva
if(getwd()=="/Users/schilder/Desktop/Fine_Mapping"){
vcf_folder = F # Use internet if I'm working from my laptop
} else{
vcf_folder = F#"../1000_Genomes_VCFs/Phase1" # Use previously downloaded files if working on Minerva node
} top_SNPs <- import_topSNPs(
file_path = Data_dirs$Nalls_23andMe$topSS,
chrom_col = "CHR", position_col = "BP", snp_col="SNP",
pval_col="P, all studies", effect_col="Beta, all studies", gene_col="Nearest Gene",
caption= "Nalls et al. (2018) w/ 23andMe PD GWAS Summary Stats")
finemapped_PD_Nalls_23andMe <- finemap_geneList(top_SNPs, geneList=c("LRRK2"),#c("LRRK2","GBAP1","SNCA","VPS13C","GCH1"),
file_path= Data_dirs$Nalls_23andMe$fullSS,#"./LRRK2_EUR_subset.txt",#
chrom_col = "CHR", position_col = "POS", snp_col = "RSID",
pval_col = "p", effect_col = "beta", stderr_col = "se",
vcf_folder = vcf_folder, superpopulation = "EUR")Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/pd-omics/data/nallsEtAl2019/combined_meta/LRRK2_EUR_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/LRRK2_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-4819.31771667441” [1] “objective:-4817.53694272212” [1] “objective:-4817.5346354578” [1] “objective:-4817.53463247054” [1] “objective:-4817.5346324668” [1] “objective:-4817.53463246679”
Credible Set:
## Warning: Removed 1 rows containing missing values (geom_hline).
top_SNPs <- import_topSNPs(
file_path = Data_dirs$Posthuma_2018$topSS,
sheet = "Overall (phase 3)",
chrom_col = "Chr", position_col = "bp", snp_col="SNP",
pval_col="P", effect_col="Z", gene_col="Gene",
caption= "Posthuma et al. (2018) AD GWAS Summary Stats")
finemapped_AD_Posthuma <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
file_path=Data_dirs$Posthuma_2018$fullSS,#"Data/Alzheimers/Posthuma_2018/phase3.beta.se.hrc.txt",#
effect_col = "BETA", stderr_col = "SE", position_col = "BP",
snp_col = "SNP", pval_col = "P",
vcf_folder = vcf_folder, superpopulation = "EUR")Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Posthuma_2018/PTK2B_EUR_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-6157.44090915229” [1] “objective:-6157.0855671831” [1] “objective:-6157.0854402076” [1] “objective:-6157.08544016219”
Credible Set:
## Warning: Removed 1 rows containing missing values (geom_hline).
top_SNPs <- import_topSNPs(
file_path = Data_dirs$Marioni_2018$topSS,
sheet = "Table S9",
chrom_col = "topSNP_chr", position_col = "topSNP_bp", snp_col="topSNP",
pval_col="p_GWAS", effect_col="b_GWAS", gene_col="Gene",
caption= "Marioni et al. (2018) AD GWAS Summary Stats")
finemapped_AD_Marioni <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
file_path=Data_dirs$Marioni_2018$fullSS,#"Data/Alzheimers/Marioni_2018/Marioni2018.4_UK_Biobank_IGAP_17May2018.1000G.phase3.20130502.tsv", #,
effect_col = "BETA", stderr_col = "SE", position_col = "POS",
snp_col = "SNP",chrom_col = "CHROM", pval_col = "PVAL",
vcf_folder = vcf_folder, superpopulation = "EUR")Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/Marioni_2018/PTK2B_EUR_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-3136.01873136029” [1] “objective:-3134.8397689979” [1] “objective:-3134.83862729993” [1] “objective:-3134.83862618178” [1] “objective:-3134.8386261807” [1] “objective:-3134.8386261807”
Credible Set:
## Warning: Removed 1 rows containing missing values (geom_hline).
## Warning: Removed 572 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 572 rows containing missing values (geom_segment).
## Warning: Removed 7 rows containing missing values (geom_text_repel).
top_SNPs <- import_topSNPs(
file_path = Data_dirs$IGAP$topSS,
sheet = 1,
chrom_col = "CHR", position_col = "Position", snp_col="SNP",
pval_col="Overall_P", effect_col="Overall_OR", gene_col="Closest gene",
caption= "IGAP: AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )
finemapped_AD_IGAP <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
file_path=Data_dirs$IGAP$fullSS,
chrom_col = "CHROM", pval_col = "PVAL", snp_col = "SNP",
effect_col = "BETA", stderr_col = "SE", position_col = "POS",
vcf_folder = vcf_folder, num_causal = 1, superpopulation = "EUR")Extracting SNPs flanking lead SNP… Subset file already exists. Importing /hpc/users/schilb03/../../../sc/orga/projects/ad-omics/data/AD_GWAS/IGAP/PTK2B_EUR_subset.txt …
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-3041.09565605669” [1] “objective:-3040.53825357985” [1] “objective:-3040.5378253415” [1] “objective:-3040.53782501108”
Credible Set:
## Warning: Removed 1 rows containing missing values (geom_hline).
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_segment).
## Warning: Removed 8 rows containing missing values (geom_text_repel).
top_SNPs <- import_topSNPs(
file_path = Data_dirs$Kunkle_2019$topSS,
sheet = "Supplementary Table 6",
chrom_col = "Chr", position_col = "Basepair", snp_col="SNP",
pval_col="P", effect_col="Beta", gene_col="LOCUS",
caption= "Kunkle (2019): AD GWAS Summary Stats")
# genes <- snps_to_genes(snp_list=top_SNPs$SNP )
finemapped_AD_Kunkle <- finemap_geneList(top_SNPs, geneList=c("PTK2B"),
file_path="Data/Alzheimers/Kunkle_2019/Kunkle_etal_Stage2_results.txt",#Data_dirs$Kunkle_2019$fullSS,
chrom_col = "Chromosome", position_col = "Position", pval_col = "Pvalue",
snp_col = "MarkerName", effect_col = "Beta", stderr_col = "SE",
vcf_folder = vcf_folder, num_causal = 1, superpopulation = "EUR")superpopulation <- "AFA"
finemapped_eQTL_MESA.AFA <- finemap_eQTL(superpopulation, gene = "PTK2B",
fullSS_path = Data_dirs$MESA_AFA$fullSS)Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AFR_subset.txt …
## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored
Extracting SNPs flanking lead SNP… Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AFR_subset.txt … Calculating Standard Error…
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-6410.83082343629” [1] “objective:-6410.73262589551” [1] “objective:-6410.73258329645” [1] “objective:-6410.73258327796”
Credible Set:
## Warning: Removed 1 rows containing missing values (geom_hline).
superpopulation <- "CAU"
finemapped_eQTL_MESA.CAU <- finemap_eQTL(superpopulation, gene = "PTK2B",
fullSS_path = Data_dirs$MESA_CAU$fullSS)Subset file already exists. Importing Data/eQTL/MESA/PTK2B_EUR_subset.txt …
## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored
Extracting SNPs flanking lead SNP… Subset file already exists. Importing Data/eQTL/MESA/PTK2B_EUR_subset.txt … Calculating Standard Error…
Creating LD matrix… LD Reference Panel = 1KG_Phase1 Creating ‘../1000_Genomes_VCFs’ directory. Identified matching VCF subset file. Importing… ../1000_Genomes_VCFs/Phase1/PTK2B_subset.vcf ped stats and snps stats have been set. ‘p’ has been set. ‘mu’ and ‘sigma’ have been set.
Fine mapping with SusieR… [1] “objective:-3454.00148726064” [1] “objective:-3453.35330042907” [1] “objective:-3453.3528459927” [1] “objective:-3453.35284567247”
Credible Set:
## Warning: Removed 1 rows containing missing values (geom_hline).
superpopulation <- "HIS"
try({
finemapped_eQTL_MESA.HIS <- finemap_eQTL(superpopulation, gene = "PTK2B",
fullSS_path = Data_dirs$MESA_HIS$fullSS)
})Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AMR_subset.txt …
## Warning in charToRaw(enc2utf8(text)): argument should be a character vector of length 1
## all but the first element will be ignored
Extracting SNPs flanking lead SNP… Subset file already exists. Importing Data/eQTL/MESA/PTK2B_AMR_subset.txt … Calculating Standard Error…
Fine mapping with SusieR… [1] “objective:-4639.57322734461” [1] “objective:-4639.47138123614” [1] “objective:-4639.47122903255” [1] “objective:-4639.47122880477”
Credible Set:
try({
# finemapped_eQTL_MESA.AFA <- finemapped_eQTL_MESA.HIS<- finemapped_eQTL_MESA.CAU
named_results_list <- list("Nalls_23andMe"=finemapped_PD_Nalls_23andMe,
"IGAP"=finemapped_AD_IGAP,
"Posthuma_2018"=finemapped_AD_Posthuma,
"Marioni_2018"=finemapped_AD_Marioni,
"Kunkle_2019"=finemapped_AD_Kunkle,
"eQTL_MESA.AFA"=finemapped_eQTL_MESA.AFA,
"eQTL_MESA.CAU"=finemapped_eQTL_MESA.CAU,
"eQTL_MESA.HIS"=finemapped_eQTL_MESA.HIS)
merged_results <- merge_finemapping_results(named_results_list, csv_path ="merged_results.csv")
createDT(merged_results, caption = "Finemapped Credible Sets for Multiple Datasets")
table(as.character(merged_results$SNP))
})